All packages used to for analysis and figures in this page:
library(tidyverse)
library(knitr)
library(kableExtra)
library(plotly)
library(DT)
library(ggcorrplot)
library(psych)
library(GGally)
library(gridExtra)
library(factoextra)
library(FactoMineR)
# remotes::install_github("LCBC-UiO/ggseg")
#
# If that doesn't work:
#
# download.file("https://github.com/LCBC-UiO/ggseg/archive/master.zip", "ggseg.zip")
# unzip("ggseg.zip")
# devtools::install_local("ggseg-master")
library(ggseg)
# remotes::install_github("LCBC-UiO/ggseg3d")
library(ggseg3d)
# remotes::install_github("LCBC-UiO/ggsegExtra")
library(ggsegExtra)
First, I’ll load the partial volume-corrected regional tau-PET data from ADNI. For more info on this dataset, please see Data Understanding and Acknowledgments.
tau.df <- read.csv("../../ADNI_Data/Raw_Data/UCBERKELEYAV1451_PVC_05_12_20.csv", stringsAsFactors = T)
tau.df$EXAMDATE <- as.Date(as.character(tau.df$EXAMDATE), format="%m/%d/%Y")
str:
str(tau.df)
## 'data.frame': 1120 obs. of 165 variables:
## $ RID : int 21 31 31 56 56 56 59 69 69 69 ...
## $ VISCODE : Factor w/ 9 levels "bl","init","tau",..: 2 2 8 2 8 9 2 2 8 9 ...
## $ VISCODE2 : Factor w/ 16 levels "","bl","m108",..: 7 7 8 7 8 9 7 7 8 9 ...
## $ EXAMDATE : Date, format: "2018-02-02" "2018-04-24" "2019-04-23" "2018-02-20" ...
## $ INFERIOR_CEREBGM_SUVR : num 1.32 1.33 1.33 1.28 1.24 ...
## $ INFERIOR_CEREBGM_VOLUME : int 52306 54296 54296 56750 56750 56750 59836 56862 56862 56862 ...
## $ HEMIWM_SUVR : num 1.02 0.85 0.866 1.138 1.196 ...
## $ HEMIWM_VOLUME : int 321220 281690 281690 336495 336495 336495 294422 463900 463900 463900 ...
## $ BRAAK12_SUVR : num 2.06 2.24 2.3 1.91 1.88 ...
## $ BRAAK12_VOLUME : int 10275 7587 7587 9376 9376 9376 10379 10981 10981 10981 ...
## $ BRAAK34_SUVR : num 1.95 1.87 1.8 1.82 1.77 ...
## $ BRAAK34_VOLUME : int 95661 95419 95419 92482 92482 92482 94092 112788 112788 112788 ...
## $ BRAAK56_SUVR : num 1.99 1.92 1.84 1.87 1.84 ...
## $ BRAAK56_VOLUME : int 284821 288136 288136 283119 283119 283119 283727 325054 325054 325054 ...
## $ BRAIN_STEM_SUVR : num 1.27 1.12 1.12 1.2 1.17 ...
## $ BRAIN_STEM_VOLUME : int 16955 16952 16952 20508 20508 20492 18057 18872 18872 18866 ...
## $ LEFT_MIDDLEFR_SUVR : num 2.02 1.93 1.8 1.83 1.78 ...
## $ LEFT_MIDDLEFR_VOLUME : int 17640 18517 18517 17164 17164 17164 17683 21907 21907 21907 ...
## $ LEFT_ORBITOFR_SUVR : num 2.17 2.03 1.92 2.11 1.98 ...
## $ LEFT_ORBITOFR_VOLUME : int 11676 10091 10091 11721 11721 11721 10917 12109 12109 12109 ...
## $ LEFT_PARSFR_SUVR : num 2.02 2.01 1.98 2.03 1.99 ...
## $ LEFT_PARSFR_VOLUME : int 9201 7799 7799 9185 9185 9185 7709 9813 9813 9813 ...
## $ LEFT_ACCUMBENS_AREA_SUVR : num 1.14 1.04 1.79 1.12 1.18 ...
## $ LEFT_ACCUMBENS_AREA_VOLUME : int 500 318 318 308 308 308 353 361 361 361 ...
## $ LEFT_AMYGDALA_SUVR : num 1.31 1.54 1.63 1.42 1.37 ...
## $ LEFT_AMYGDALA_VOLUME : int 1367 1224 1224 1561 1561 1561 993 1499 1499 1499 ...
## $ LEFT_CAUDATE_SUVR : num 2.08 1.46 1.34 1.95 1.83 ...
## $ LEFT_CAUDATE_VOLUME : int 3016 4890 4890 3083 3083 3083 2874 4049 4049 4049 ...
## $ LEFT_HIPPOCAMPUS_SUVR : num 2.12 1.96 2.2 1.69 1.73 ...
## $ LEFT_HIPPOCAMPUS_VOLUME : int 3822 3050 3050 3476 3476 3476 3603 3550 3550 3550 ...
## $ LEFT_PALLIDUM_SUVR : num 3.79 1.89 1.95 2.5 2.6 ...
## $ LEFT_PALLIDUM_VOLUME : int 444 2066 2066 1301 1301 1301 1081 1634 1634 1634 ...
## $ LEFT_PUTAMEN_SUVR : num 1.69 1.64 1.42 1.9 1.78 ...
## $ LEFT_PUTAMEN_VOLUME : int 4000 5675 5675 4832 4832 4832 3563 4891 4891 4891 ...
## $ LEFT_THALAMUS_PROPER_SUVR : num 1.45 1.32 1.24 1.54 1.53 ...
## $ LEFT_THALAMUS_PROPER_VOLUME : int 8226 6195 6195 7114 7114 7114 7561 7518 7518 7518 ...
## $ RIGHT_MIDDLEFR_SUVR : num 2.08 1.91 1.8 1.94 1.85 ...
## $ RIGHT_MIDDLEFR_VOLUME : int 17250 18440 18440 15605 15605 15605 16280 22586 22586 22586 ...
## $ RIGHT_ORBITOFR_SUVR : num 2.19 2.01 1.86 2.17 2.03 ...
## $ RIGHT_ORBITOFR_VOLUME : int 11614 12637 12637 11064 11064 11064 11537 12575 12575 12575 ...
## $ RIGHT_PARSFR_SUVR : num 2.17 2.08 1.9 2.09 2.01 ...
## $ RIGHT_PARSFR_VOLUME : int 9255 8131 8131 9641 9641 9641 8839 9119 9119 9119 ...
## $ RIGHT_ACCUMBENS_AREA_SUVR : num 1.41 1.65 1.66 1.01 1.07 ...
## $ RIGHT_ACCUMBENS_AREA_VOLUME : int 545 413 413 423 423 423 542 528 528 528 ...
## $ RIGHT_AMYGDALA_SUVR : num 1.18 1.79 1.89 1.37 1.44 ...
## $ RIGHT_AMYGDALA_VOLUME : int 1268 1028 1028 1464 1464 1464 1313 1797 1797 1797 ...
## $ RIGHT_CAUDATE_SUVR : num 2.01 1.57 1.37 1.96 1.89 ...
## $ RIGHT_CAUDATE_VOLUME : int 3179 4854 4854 2984 2984 2984 3224 3835 3835 3835 ...
## $ RIGHT_HIPPOCAMPUS_SUVR : num 2.01 2.09 2.03 1.62 1.64 ...
## $ RIGHT_HIPPOCAMPUS_VOLUME : int 3978 2723 2723 3489 3489 3489 3667 3942 3942 3942 ...
## $ RIGHT_PALLIDUM_SUVR : num 3.01 2.32 2.12 2.33 2.48 ...
## $ RIGHT_PALLIDUM_VOLUME : int 846 1531 1531 1262 1262 1262 1088 1552 1552 1552 ...
## $ RIGHT_PUTAMEN_SUVR : num 1.68 1.62 1.53 2.06 1.94 ...
## $ RIGHT_PUTAMEN_VOLUME : int 4322 5774 5774 4328 4328 4328 3190 4569 4569 4569 ...
## $ RIGHT_THALAMUS_PROPER_SUVR : num 1.42 1.33 1.24 1.52 1.55 ...
## $ RIGHT_THALAMUS_PROPER_VOLUME : int 5968 5442 5442 5940 5940 5940 6257 7899 7899 7899 ...
## $ CHOROID_SUVR : num 7.45 4.56 4.31 3.84 3.79 ...
## $ CHOROID_VOLUME : int 4180 3591 3591 3165 3165 3165 3717 3663 3663 3663 ...
## $ CTX_LH_BANKSSTS_SUVR : num 1.75 1.49 1.6 1.7 1.63 ...
## $ CTX_LH_BANKSSTS_VOLUME : int 1553 1633 1633 1812 1812 1812 1694 2601 2601 2601 ...
## $ CTX_LH_CAUDALANTERIORCINGULATE_SUVR : num 1.67 1.73 1.65 1.69 1.69 ...
## $ CTX_LH_CAUDALANTERIORCINGULATE_VOLUME : int 1138 1387 1387 1124 1124 1124 1465 1512 1512 1512 ...
## $ CTX_LH_CUNEUS_SUVR : num 2.33 2.2 2.05 2.01 2 ...
## $ CTX_LH_CUNEUS_VOLUME : int 2023 2702 2702 2429 2429 2429 2393 2222 2222 2222 ...
## $ CTX_LH_ENTORHINAL_SUVR : num 2.07 2.3 2.43 2.79 2.52 ...
## $ CTX_LH_ENTORHINAL_VOLUME : int 1468 1035 1035 1068 1068 1068 1297 1888 1888 1888 ...
## $ CTX_LH_FUSIFORM_SUVR : num 1.97 1.87 1.83 1.84 1.77 ...
## $ CTX_LH_FUSIFORM_VOLUME : int 7956 6997 6997 7694 7694 7694 7807 9083 9083 9083 ...
## $ CTX_LH_INFERIORPARIETAL_SUVR : num 1.99 1.95 1.94 1.85 1.89 ...
## $ CTX_LH_INFERIORPARIETAL_VOLUME : int 11656 10174 10174 9243 9243 9243 8180 9846 9846 9846 ...
## $ CTX_LH_INFERIORTEMPORAL_SUVR : num 2.16 1.97 2.05 2.1 2.02 ...
## $ CTX_LH_INFERIORTEMPORAL_VOLUME : int 6606 6418 6418 7286 7286 7286 6869 9599 9599 9599 ...
## $ CTX_LH_INSULA_SUVR : num 1.51 1.64 1.65 1.51 1.48 ...
## $ CTX_LH_INSULA_VOLUME : int 6711 4654 4654 6003 6003 6003 5513 6597 6597 6597 ...
## $ CTX_LH_ISTHMUSCINGULATE_SUVR : num 1.9 1.81 1.82 1.79 1.94 ...
## $ CTX_LH_ISTHMUSCINGULATE_VOLUME : int 2283 2215 2215 1549 1549 1549 1944 2264 2264 2264 ...
## $ CTX_LH_LATERALOCCIPITAL_SUVR : num 2.39 2.06 1.99 1.92 2 ...
## $ CTX_LH_LATERALOCCIPITAL_VOLUME : int 8532 10148 10148 8292 8292 8292 10612 9404 9404 9404 ...
## $ CTX_LH_LINGUAL_SUVR : num 2.27 1.95 1.97 1.76 1.74 ...
## $ CTX_LH_LINGUAL_VOLUME : int 4329 4658 4658 5606 5606 5606 5435 6488 6488 6488 ...
## $ CTX_LH_MIDDLETEMPORAL_SUVR : num 2.2 2.06 1.89 2.04 1.99 ...
## $ CTX_LH_MIDDLETEMPORAL_VOLUME : int 7445 8322 8322 7292 7292 7292 8031 9467 9467 9467 ...
## $ CTX_LH_PARACENTRAL_SUVR : num 1.99 1.79 1.8 1.91 1.8 ...
## $ CTX_LH_PARACENTRAL_VOLUME : int 2672 2890 2890 3231 3231 3231 3358 3173 3173 3173 ...
## $ CTX_LH_PARAHIPPOCAMPAL_SUVR : num 1.6 1.86 1.92 1.72 1.66 ...
## $ CTX_LH_PARAHIPPOCAMPAL_VOLUME : int 1659 1549 1549 1900 1900 1900 1989 2296 2296 2296 ...
## $ CTX_LH_PERICALCARINE_SUVR : num 2.23 1.45 1.41 1.56 1.54 ...
## $ CTX_LH_PERICALCARINE_VOLUME : int 1678 2004 2004 1866 1866 1866 1918 1927 1927 1927 ...
## $ CTX_LH_POSTCENTRAL_SUVR : num 2.03 1.81 1.82 1.85 1.78 ...
## $ CTX_LH_POSTCENTRAL_VOLUME : int 8281 8428 8428 8275 8275 8275 7580 8976 8976 8976 ...
## $ CTX_LH_POSTERIORCINGULATE_SUVR : num 1.82 1.89 1.84 1.72 1.67 ...
## $ CTX_LH_POSTERIORCINGULATE_VOLUME : int 2439 2608 2608 2683 2683 2683 2573 2638 2638 2638 ...
## $ CTX_LH_PRECENTRAL_SUVR : num 1.91 1.85 1.75 1.62 1.61 ...
## $ CTX_LH_PRECENTRAL_VOLUME : int 11174 12349 12349 10924 10924 10924 10820 12307 12307 12307 ...
## $ CTX_LH_PRECUNEUS_SUVR : num 1.93 1.89 1.94 1.81 1.81 ...
## $ CTX_LH_PRECUNEUS_VOLUME : int 7870 8313 8313 8387 8387 8387 8311 8584 8584 8584 ...
## $ CTX_LH_ROSTRALANTERIORCINGULATE_SUVR : num 1.71 1.58 1.49 1.59 1.48 ...
## $ CTX_LH_ROSTRALANTERIORCINGULATE_VOLUME: int 2928 2448 2448 1695 1695 1695 2466 2915 2915 2915 ...
## $ CTX_LH_SUPERIORFRONTAL_SUVR : num 1.86 1.86 1.74 1.84 1.77 ...
## [list output truncated]
tau.df <- tau.df %>%
select(-VISCODE, -update_stamp, -HEMIWM_SUVR, -BRAAK12_SUVR,
-BRAAK34_SUVR, -BRAAK56_SUVR, -OTHER_SUVR) %>%
select(!matches("VOLUME")) %>%
group_by(RID)
colnames(tau.df) <- str_replace_all(colnames(tau.df), "_SUVR", "")
str(tau.df)
## tibble [1,120 x 78] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ RID : int [1:1120] 21 31 31 56 56 56 59 69 69 69 ...
## $ VISCODE2 : Factor w/ 16 levels "","bl","m108",..: 7 7 8 7 8 9 7 7 8 9 ...
## $ EXAMDATE : Date[1:1120], format: "2018-02-02" "2018-04-24" "2019-04-23" "2018-02-20" ...
## $ INFERIOR_CEREBGM : num [1:1120] 1.32 1.33 1.33 1.28 1.24 ...
## $ BRAIN_STEM : num [1:1120] 1.27 1.12 1.12 1.2 1.17 ...
## $ LEFT_MIDDLEFR : num [1:1120] 2.02 1.93 1.8 1.83 1.78 ...
## $ LEFT_ORBITOFR : num [1:1120] 2.17 2.03 1.92 2.11 1.98 ...
## $ LEFT_PARSFR : num [1:1120] 2.02 2.01 1.98 2.03 1.99 ...
## $ LEFT_ACCUMBENS_AREA : num [1:1120] 1.14 1.04 1.79 1.12 1.18 ...
## $ LEFT_AMYGDALA : num [1:1120] 1.31 1.54 1.63 1.42 1.37 ...
## $ LEFT_CAUDATE : num [1:1120] 2.08 1.46 1.34 1.95 1.83 ...
## $ LEFT_HIPPOCAMPUS : num [1:1120] 2.12 1.96 2.2 1.69 1.73 ...
## $ LEFT_PALLIDUM : num [1:1120] 3.79 1.89 1.95 2.5 2.6 ...
## $ LEFT_PUTAMEN : num [1:1120] 1.69 1.64 1.42 1.9 1.78 ...
## $ LEFT_THALAMUS_PROPER : num [1:1120] 1.45 1.32 1.24 1.54 1.53 ...
## $ RIGHT_MIDDLEFR : num [1:1120] 2.08 1.91 1.8 1.94 1.85 ...
## $ RIGHT_ORBITOFR : num [1:1120] 2.19 2.01 1.86 2.17 2.03 ...
## $ RIGHT_PARSFR : num [1:1120] 2.17 2.08 1.9 2.09 2.01 ...
## $ RIGHT_ACCUMBENS_AREA : num [1:1120] 1.41 1.65 1.66 1.01 1.07 ...
## $ RIGHT_AMYGDALA : num [1:1120] 1.18 1.79 1.89 1.37 1.44 ...
## $ RIGHT_CAUDATE : num [1:1120] 2.01 1.57 1.37 1.96 1.89 ...
## $ RIGHT_HIPPOCAMPUS : num [1:1120] 2.01 2.09 2.03 1.62 1.64 ...
## $ RIGHT_PALLIDUM : num [1:1120] 3.01 2.32 2.12 2.33 2.48 ...
## $ RIGHT_PUTAMEN : num [1:1120] 1.68 1.62 1.53 2.06 1.94 ...
## $ RIGHT_THALAMUS_PROPER : num [1:1120] 1.42 1.33 1.24 1.52 1.55 ...
## $ CHOROID : num [1:1120] 7.45 4.56 4.31 3.84 3.79 ...
## $ CTX_LH_BANKSSTS : num [1:1120] 1.75 1.49 1.6 1.7 1.63 ...
## $ CTX_LH_CAUDALANTERIORCINGULATE : num [1:1120] 1.67 1.73 1.65 1.69 1.69 ...
## $ CTX_LH_CUNEUS : num [1:1120] 2.33 2.2 2.05 2.01 2 ...
## $ CTX_LH_ENTORHINAL : num [1:1120] 2.07 2.3 2.43 2.79 2.52 ...
## $ CTX_LH_FUSIFORM : num [1:1120] 1.97 1.87 1.83 1.84 1.77 ...
## $ CTX_LH_INFERIORPARIETAL : num [1:1120] 1.99 1.95 1.94 1.85 1.89 ...
## $ CTX_LH_INFERIORTEMPORAL : num [1:1120] 2.16 1.97 2.05 2.1 2.02 ...
## $ CTX_LH_INSULA : num [1:1120] 1.51 1.64 1.65 1.51 1.48 ...
## $ CTX_LH_ISTHMUSCINGULATE : num [1:1120] 1.9 1.81 1.82 1.79 1.94 ...
## $ CTX_LH_LATERALOCCIPITAL : num [1:1120] 2.39 2.06 1.99 1.92 2 ...
## $ CTX_LH_LINGUAL : num [1:1120] 2.27 1.95 1.97 1.76 1.74 ...
## $ CTX_LH_MIDDLETEMPORAL : num [1:1120] 2.2 2.06 1.89 2.04 1.99 ...
## $ CTX_LH_PARACENTRAL : num [1:1120] 1.99 1.79 1.8 1.91 1.8 ...
## $ CTX_LH_PARAHIPPOCAMPAL : num [1:1120] 1.6 1.86 1.92 1.72 1.66 ...
## $ CTX_LH_PERICALCARINE : num [1:1120] 2.23 1.45 1.41 1.56 1.54 ...
## $ CTX_LH_POSTCENTRAL : num [1:1120] 2.03 1.81 1.82 1.85 1.78 ...
## $ CTX_LH_POSTERIORCINGULATE : num [1:1120] 1.82 1.89 1.84 1.72 1.67 ...
## $ CTX_LH_PRECENTRAL : num [1:1120] 1.91 1.85 1.75 1.62 1.61 ...
## $ CTX_LH_PRECUNEUS : num [1:1120] 1.93 1.89 1.94 1.81 1.81 ...
## $ CTX_LH_ROSTRALANTERIORCINGULATE: num [1:1120] 1.71 1.58 1.49 1.59 1.48 ...
## $ CTX_LH_SUPERIORFRONTAL : num [1:1120] 1.86 1.86 1.74 1.84 1.77 ...
## $ CTX_LH_SUPERIORPARIETAL : num [1:1120] 1.88 1.99 1.97 1.9 1.87 ...
## $ CTX_LH_SUPERIORTEMPORAL : num [1:1120] 2.06 1.92 1.83 1.85 1.76 ...
## $ CTX_LH_SUPRAMARGINAL : num [1:1120] 1.93 1.83 1.74 1.81 1.81 ...
## $ CTX_LH_TEMPORALPOLE : num [1:1120] 2.23 2.05 1.87 1.96 1.8 ...
## $ CTX_LH_TRANSVERSETEMPORAL : num [1:1120] 1.75 1.73 1.72 1.67 1.47 ...
## $ CTX_RH_BANKSSTS : num [1:1120] 1.78 1.85 1.68 1.68 1.65 ...
## $ CTX_RH_CAUDALANTERIORCINGULATE : num [1:1120] 1.75 1.78 1.66 1.76 1.8 ...
## $ CTX_RH_CUNEUS : num [1:1120] 2.45 2.25 2.19 2.13 2.11 ...
## $ CTX_RH_ENTORHINAL : num [1:1120] 1.99 3.74 3.46 2.52 2.34 ...
## $ CTX_RH_FUSIFORM : num [1:1120] 1.98 1.79 1.75 1.8 1.71 ...
## $ CTX_RH_INFERIORPARIETAL : num [1:1120] 1.96 1.93 1.82 1.81 1.82 ...
## $ CTX_RH_INFERIORTEMPORAL : num [1:1120] 2.25 1.97 1.81 2.01 1.98 ...
## $ CTX_RH_INSULA : num [1:1120] 1.49 1.61 1.48 1.58 1.45 ...
## $ CTX_RH_ISTHMUSCINGULATE : num [1:1120] 2.03 1.86 1.85 1.85 1.81 ...
## $ CTX_RH_LATERALOCCIPITAL : num [1:1120] 2.27 1.9 1.89 1.86 1.92 ...
## $ CTX_RH_LINGUAL : num [1:1120] 2.12 2.07 1.88 1.87 1.83 ...
## $ CTX_RH_MIDDLETEMPORAL : num [1:1120] 2.2 1.85 1.81 2 1.94 ...
## $ CTX_RH_PARACENTRAL : num [1:1120] 1.82 1.79 1.63 1.89 1.89 ...
## $ CTX_RH_PARAHIPPOCAMPAL : num [1:1120] 1.52 1.94 1.94 1.84 1.74 ...
## $ CTX_RH_PERICALCARINE : num [1:1120] 2.02 2.07 2 1.6 1.62 ...
## $ CTX_RH_POSTCENTRAL : num [1:1120] 1.91 1.9 1.74 1.81 1.76 ...
## $ CTX_RH_POSTERIORCINGULATE : num [1:1120] 1.79 1.79 1.68 1.68 1.77 ...
## $ CTX_RH_PRECENTRAL : num [1:1120] 1.72 1.87 1.77 1.71 1.71 ...
## $ CTX_RH_PRECUNEUS : num [1:1120] 1.86 1.82 1.75 1.85 1.86 ...
## $ CTX_RH_ROSTRALANTERIORCINGULATE: num [1:1120] 1.78 1.63 1.57 1.52 1.49 ...
## $ CTX_RH_SUPERIORFRONTAL : num [1:1120] 1.86 1.9 1.81 1.82 1.79 ...
## $ CTX_RH_SUPERIORPARIETAL : num [1:1120] 1.78 2 1.9 1.93 1.92 ...
## $ CTX_RH_SUPERIORTEMPORAL : num [1:1120] 1.97 1.92 1.82 1.89 1.83 ...
## $ CTX_RH_SUPRAMARGINAL : num [1:1120] 1.8 1.81 1.76 1.8 1.79 ...
## $ CTX_RH_TEMPORALPOLE : num [1:1120] 2.29 2.3 2.21 1.98 1.75 ...
## $ CTX_RH_TRANSVERSETEMPORAL : num [1:1120] 1.96 2.14 1.91 1.62 1.55 ...
## - attr(*, "groups")= tibble [776 x 2] (S3: tbl_df/tbl/data.frame)
## ..$ RID : int [1:776] 21 31 56 59 69 96 112 120 127 142 ...
## ..$ .rows: list<int> [1:776]
## .. ..$ : int 1
## .. ..$ : int [1:2] 2 3
## .. ..$ : int [1:3] 4 5 6
## .. ..$ : int 7
## .. ..$ : int [1:3] 8 9 10
## .. ..$ : int [1:3] 11 12 13
## .. ..$ : int [1:3] 14 15 16
## .. ..$ : int 17
## .. ..$ : int 18
## .. ..$ : int 19
## .. ..$ : int 20
## .. ..$ : int 21
## .. ..$ : int 22
## .. ..$ : int 23
## .. ..$ : int [1:3] 24 25 26
## .. ..$ : int 27
## .. ..$ : int [1:2] 28 29
## .. ..$ : int 30
## .. ..$ : int 31
## .. ..$ : int [1:2] 32 33
## .. ..$ : int 34
## .. ..$ : int 35
## .. ..$ : int [1:2] 36 37
## .. ..$ : int 38
## .. ..$ : int [1:2] 39 40
## .. ..$ : int [1:2] 41 42
## .. ..$ : int 43
## .. ..$ : int [1:2] 44 45
## .. ..$ : int 46
## .. ..$ : int 47
## .. ..$ : int [1:3] 48 49 50
## .. ..$ : int [1:2] 51 52
## .. ..$ : int [1:3] 53 54 55
## .. ..$ : int 56
## .. ..$ : int [1:2] 57 58
## .. ..$ : int 59
## .. ..$ : int 60
## .. ..$ : int 61
## .. ..$ : int 62
## .. ..$ : int 63
## .. ..$ : int 64
## .. ..$ : int 65
## .. ..$ : int 66
## .. ..$ : int 67
## .. ..$ : int [1:4] 68 69 70 71
## .. ..$ : int 72
## .. ..$ : int [1:2] 73 74
## .. ..$ : int [1:2] 75 76
## .. ..$ : int 77
## .. ..$ : int 78
## .. ..$ : int [1:3] 79 80 81
## .. ..$ : int [1:3] 82 83 84
## .. ..$ : int 85
## .. ..$ : int 86
## .. ..$ : int 87
## .. ..$ : int [1:2] 88 89
## .. ..$ : int 90
## .. ..$ : int 91
## .. ..$ : int 92
## .. ..$ : int [1:3] 93 94 95
## .. ..$ : int [1:2] 96 97
## .. ..$ : int 98
## .. ..$ : int [1:2] 99 100
## .. ..$ : int 101
## .. ..$ : int 102
## .. ..$ : int 103
## .. ..$ : int 104
## .. ..$ : int [1:5] 105 106 107 108 109
## .. ..$ : int 110
## .. ..$ : int [1:3] 111 112 113
## .. ..$ : int [1:2] 114 115
## .. ..$ : int 116
## .. ..$ : int [1:4] 117 118 119 120
## .. ..$ : int [1:2] 121 122
## .. ..$ : int [1:4] 123 124 125 126
## .. ..$ : int [1:3] 127 128 129
## .. ..$ : int 130
## .. ..$ : int 131
## .. ..$ : int 132
## .. ..$ : int 133
## .. ..$ : int 134
## .. ..$ : int [1:2] 135 136
## .. ..$ : int 137
## .. ..$ : int 138
## .. ..$ : int [1:2] 139 140
## .. ..$ : int 141
## .. ..$ : int 142
## .. ..$ : int 143
## .. ..$ : int [1:3] 144 145 146
## .. ..$ : int [1:2] 147 148
## .. ..$ : int 149
## .. ..$ : int [1:2] 150 151
## .. ..$ : int 152
## .. ..$ : int 153
## .. ..$ : int 154
## .. ..$ : int 155
## .. ..$ : int 156
## .. ..$ : int 157
## .. ..$ : int 158
## .. .. [list output truncated]
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
As shown in Data Understanding, the ROIs are not precisely standardized to the inferior cerebellum gray matter SUVR. I will re-standardize each region’s ROI SUVR values here.
tau.stand <- tau.df
for (i in 4:ncol(tau.stand)) {
tau.stand[i] <- tau.stand[i]/ tau.df[4]
}
rm(tau.df)
Standardization can be verified using summary:
summary(tau.stand$INFERIOR_CEREBGM)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 1 1 1 1 1
Now that regional SUVR is properly standardized, the next step is to select brain regions based on a priori knowledge of where and how tau affects the brain in MCI/AD. I am going to stratify the cortical parcellations and subcortical segmentations based on Schöll et al. (2016) and per UCSF’s recommendations for usage of their tau-PET data. Here is the stratification across the Braak stages:
roi.braak <- read.csv("roi_braak_stages.csv") %>% mutate(ROI_Name = tolower(ROI_Name)) %>%
mutate(Hemisphere = ifelse(str_detect(ROI_Name, "rh_|right"), "Right", "Left"))
datatable(roi.braak %>% select(-Base_Name))
The following plots show the spatial relationship of the Braak stages in the brain, in the cortical (top) and subcortical (bottom) ROIs:
ggseg.aparc <- read_excel("ggseg_roi.xlsx", sheet=1) %>%
mutate(Braak=as.numeric(as.roman(Braak)))
ggseg.aseg <- read_excel("ggseg_roi.xlsx", sheet=2) %>%
mutate(Braak=as.numeric(as.roman(Braak)))
myfont <- list(
family = "sans",
size = 14)
p1 <- data.frame(region=ggseg.aparc$ggseg_ROI,
Braak=as.character(ggseg.aparc$Braak), stringsAsFactors = F) %>%
ggseg(atlas="dk", mapping=aes(fill=Braak, label=region)) +
scale_fill_discrete(breaks = c("1", "3", "4", "5", "6")) +
theme(axis.text.y=element_blank(),
axis.text.x = element_text(family="calibri"),
axis.title.x = element_text(family="calibri"),
legend.title=element_text(family="calibri"),
legend.text=element_text(family="calibri"))
ggplotly(p1, tooltip = c("fill", "label"), width=800, height=300)
p2 <- data.frame(region=ggseg.aseg$ggseg_ROI,
Braak=as.character(ggseg.aseg$Braak), stringsAsFactors = F) %>%
ggseg(atlas="aseg", mapping=aes(fill=Braak, label=region)) +
scale_fill_discrete(breaks = c("2", "3")) +
theme(axis.text.y=element_blank(),
axis.text.x = element_text(family="calibri"),
axis.title.x = element_text(family="calibri"),
legend.title=element_text(family="calibri"),
legend.text=element_text(family="calibri"))
ggplotly(p2, tooltip=c("fill", "label"), width=800, height=300)
I will filter the tau-PET dataset to only include SUVR data for ROIs detailed in the above list, by first reshaping the tau-PET SUVR data from wide to long. Then, I will merge left and right hemisphere ROIs into one bilateral ROI by taking the mean SUVR.
tau.stand.roi <- tau.stand %>%
pivot_longer(., cols=c(-RID, -VISCODE2, -EXAMDATE), names_to="ROI_Name", values_to="SUVR") %>%
mutate(ROI_Name=tolower(ROI_Name)) %>%
semi_join(., roi.braak) %>%
left_join(., roi.braak) %>%
mutate(ROI_Name = str_replace_all(ROI_Name, "right_|left_|ctx_rh_|ctx_lh_", "")) %>%
dplyr::group_by(RID, VISCODE2, EXAMDATE, ROI_Name, Braak) %>%
dplyr::summarise(SUVR = mean(SUVR, na.rm=T))
data.frame(ROI=unique(tau.stand.roi$ROI_Name)) %>%
left_join(., roi.braak, by=c("ROI"="Base_Name")) %>%
select(-ROI_Name, -Hemisphere) %>%
distinct() %>%
datatable()
Now, I will re-shape the tau-PET data back to wide to be compatible with the cognitive status data shape.
tau.stand.roi <- tau.stand.roi %>%
select(-Braak) %>%
pivot_wider(id_cols=c(RID, VISCODE2, EXAMDATE), names_from="ROI_Name",
values_from="SUVR")
str(tau.stand.roi)
## tibble [1,120 x 34] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ RID : int [1:1120] 21 31 31 56 56 56 59 69 69 69 ...
## $ VISCODE2 : Factor w/ 16 levels "","bl","m108",..: 7 7 8 7 8 9 7 7 8 9 ...
## $ EXAMDATE : Date[1:1120], format: "2018-02-02" "2018-04-24" "2019-04-23" "2018-02-20" ...
## $ amygdala : num [1:1120] 0.943 1.254 1.327 1.09 1.127 ...
## $ bankssts : num [1:1120] 1.34 1.26 1.23 1.32 1.32 ...
## $ caudalanteriorcingulate : num [1:1120] 1.29 1.32 1.25 1.35 1.4 ...
## $ cuneus : num [1:1120] 1.81 1.67 1.6 1.62 1.65 ...
## $ entorhinal : num [1:1120] 1.54 2.28 2.22 2.07 1.95 ...
## $ fusiform : num [1:1120] 1.5 1.38 1.35 1.42 1.4 ...
## $ hippocampus : num [1:1120] 1.56 1.52 1.59 1.29 1.36 ...
## $ inferiorparietal : num [1:1120] 1.5 1.46 1.42 1.43 1.49 ...
## $ inferiortemporal : num [1:1120] 1.67 1.48 1.45 1.61 1.61 ...
## $ insula : num [1:1120] 1.14 1.22 1.18 1.21 1.18 ...
## $ isthmuscingulate : num [1:1120] 1.49 1.38 1.38 1.42 1.51 ...
## $ lateraloccipital : num [1:1120] 1.77 1.49 1.46 1.47 1.58 ...
## $ lingual : num [1:1120] 1.66 1.52 1.45 1.42 1.43 ...
## $ middlefr : num [1:1120] 1.55 1.44 1.36 1.48 1.46 ...
## $ middletemporal : num [1:1120] 1.67 1.47 1.39 1.58 1.58 ...
## $ orbitofr : num [1:1120] 1.65 1.52 1.43 1.67 1.61 ...
## $ paracentral : num [1:1120] 1.44 1.35 1.3 1.48 1.48 ...
## $ parahippocampal : num [1:1120] 1.18 1.43 1.45 1.39 1.37 ...
## $ parsfr : num [1:1120] 1.59 1.54 1.46 1.61 1.61 ...
## $ pericalcarine : num [1:1120] 1.61 1.33 1.29 1.23 1.27 ...
## $ postcentral : num [1:1120] 1.49 1.4 1.34 1.43 1.42 ...
## $ posteriorcingulate : num [1:1120] 1.37 1.39 1.33 1.33 1.38 ...
## $ precentral : num [1:1120] 1.37 1.4 1.33 1.3 1.34 ...
## $ precuneus : num [1:1120] 1.43 1.4 1.39 1.43 1.48 ...
## $ rostralanteriorcingulate: num [1:1120] 1.32 1.21 1.15 1.21 1.2 ...
## $ superiorfrontal : num [1:1120] 1.41 1.42 1.34 1.43 1.43 ...
## $ superiorparietal : num [1:1120] 1.39 1.5 1.46 1.49 1.52 ...
## $ superiortemporal : num [1:1120] 1.53 1.45 1.38 1.46 1.44 ...
## $ supramarginal : num [1:1120] 1.41 1.37 1.32 1.41 1.45 ...
## $ temporalpole : num [1:1120] 1.71 1.64 1.54 1.54 1.43 ...
## $ transversetemporal : num [1:1120] 1.41 1.46 1.37 1.28 1.21 ...
## - attr(*, "groups")= tibble [1,120 x 4] (S3: tbl_df/tbl/data.frame)
## ..$ RID : int [1:1120] 21 31 31 56 56 56 59 69 69 69 ...
## ..$ VISCODE2: Factor w/ 16 levels "","bl","m108",..: 7 7 8 7 8 9 7 7 8 9 ...
## ..$ EXAMDATE: Date[1:1120], format: "2018-02-02" "2018-04-24" "2019-04-23" ...
## ..$ .rows : list<int> [1:1120]
## .. ..$ : int 1
## .. ..$ : int 2
## .. ..$ : int 3
## .. ..$ : int 4
## .. ..$ : int 5
## .. ..$ : int 6
## .. ..$ : int 7
## .. ..$ : int 8
## .. ..$ : int 9
## .. ..$ : int 10
## .. ..$ : int 11
## .. ..$ : int 12
## .. ..$ : int 13
## .. ..$ : int 14
## .. ..$ : int 15
## .. ..$ : int 16
## .. ..$ : int 17
## .. ..$ : int 18
## .. ..$ : int 19
## .. ..$ : int 20
## .. ..$ : int 21
## .. ..$ : int 22
## .. ..$ : int 23
## .. ..$ : int 24
## .. ..$ : int 25
## .. ..$ : int 26
## .. ..$ : int 27
## .. ..$ : int 28
## .. ..$ : int 29
## .. ..$ : int 30
## .. ..$ : int 31
## .. ..$ : int 32
## .. ..$ : int 33
## .. ..$ : int 34
## .. ..$ : int 35
## .. ..$ : int 36
## .. ..$ : int 37
## .. ..$ : int 38
## .. ..$ : int 39
## .. ..$ : int 40
## .. ..$ : int 41
## .. ..$ : int 42
## .. ..$ : int 43
## .. ..$ : int 44
## .. ..$ : int 45
## .. ..$ : int 46
## .. ..$ : int 47
## .. ..$ : int 48
## .. ..$ : int 49
## .. ..$ : int 50
## .. ..$ : int 51
## .. ..$ : int 52
## .. ..$ : int 53
## .. ..$ : int 54
## .. ..$ : int 55
## .. ..$ : int 56
## .. ..$ : int 57
## .. ..$ : int 58
## .. ..$ : int 59
## .. ..$ : int 60
## .. ..$ : int 61
## .. ..$ : int 62
## .. ..$ : int 63
## .. ..$ : int 64
## .. ..$ : int 65
## .. ..$ : int 66
## .. ..$ : int 67
## .. ..$ : int 68
## .. ..$ : int 69
## .. ..$ : int 70
## .. ..$ : int 71
## .. ..$ : int 72
## .. ..$ : int 73
## .. ..$ : int 74
## .. ..$ : int 75
## .. ..$ : int 76
## .. ..$ : int 77
## .. ..$ : int 78
## .. ..$ : int 79
## .. ..$ : int 80
## .. ..$ : int 81
## .. ..$ : int 82
## .. ..$ : int 83
## .. ..$ : int 84
## .. ..$ : int 85
## .. ..$ : int 86
## .. ..$ : int 87
## .. ..$ : int 88
## .. ..$ : int 89
## .. ..$ : int 90
## .. ..$ : int 91
## .. ..$ : int 92
## .. ..$ : int 93
## .. ..$ : int 94
## .. ..$ : int 95
## .. ..$ : int 96
## .. ..$ : int 97
## .. ..$ : int 98
## .. ..$ : int 99
## .. .. [list output truncated]
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
ADNI compiled a merged dataset containing key information from several tables, including subject demographics, selected cognitive assessment scores, and select biomarker data.
I am interested in the following features in this dataset:
RID: Participant roster ID, which serves as unique subject identifierVISCODE: Visit codeEXAMDATE: DateAGE: Age at visitPTGENDER: Biological sexCDRSB: CDR Sum-of-Boxes score at visitDX: Current cognitive diagnosissubj.info <- read.csv("../../ADNI_Data/Raw_Data/ADNIMERGE.csv", stringsAsFactors = T, na.strings="")
subj.info <- subj.info %>% select(RID, VISCODE, AGE, PTGENDER, CDRSB, DX)
I actually can’t join the two datasets on the EXAMDATE feature, as these sometimes differ by one or two days depending on when the records were entered. Instead, I will join by the RID subject identifier and VISCODE, a visit code identifier.
full.df <- inner_join(tau.stand.roi, subj.info, by=c("RID", "VISCODE2"="VISCODE")) %>%
filter(!is.na(CDRSB)) %>%
group_by(RID) %>%
dplyr::mutate(n_visits = n()) %>%
filter(n_visits>1) %>%
select(-n_visits)
Click to see the structure of this merged dataset:
str(full.df)
## tibble [576 x 38] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ RID : int [1:576] 31 31 56 56 56 69 69 69 96 96 ...
## $ VISCODE2 : Factor w/ 30 levels "","bl","m108",..: 7 8 7 8 9 7 8 9 5 6 ...
## $ EXAMDATE : Date[1:576], format: "2018-04-24" "2019-04-23" "2018-02-20" "2019-01-10" ...
## $ amygdala : num [1:576] 1.25 1.33 1.09 1.13 1.05 ...
## $ bankssts : num [1:576] 1.26 1.23 1.32 1.32 1.3 ...
## $ caudalanteriorcingulate : num [1:576] 1.32 1.25 1.35 1.4 1.21 ...
## $ cuneus : num [1:576] 1.67 1.6 1.62 1.65 1.56 ...
## $ entorhinal : num [1:576] 2.28 2.22 2.07 1.95 1.98 ...
## $ fusiform : num [1:576] 1.38 1.35 1.42 1.4 1.34 ...
## $ hippocampus : num [1:576] 1.52 1.59 1.29 1.36 1.34 ...
## $ inferiorparietal : num [1:576] 1.46 1.42 1.43 1.49 1.45 ...
## $ inferiortemporal : num [1:576] 1.48 1.45 1.61 1.61 1.58 ...
## $ insula : num [1:576] 1.22 1.18 1.21 1.18 1.08 ...
## $ isthmuscingulate : num [1:576] 1.38 1.38 1.42 1.51 1.36 ...
## $ lateraloccipital : num [1:576] 1.49 1.46 1.47 1.58 1.59 ...
## $ lingual : num [1:576] 1.52 1.45 1.42 1.43 1.38 ...
## $ middlefr : num [1:576] 1.44 1.36 1.48 1.46 1.42 ...
## $ middletemporal : num [1:576] 1.47 1.39 1.58 1.58 1.56 ...
## $ orbitofr : num [1:576] 1.52 1.43 1.67 1.61 1.5 ...
## $ paracentral : num [1:576] 1.35 1.3 1.48 1.48 1.41 ...
## $ parahippocampal : num [1:576] 1.43 1.45 1.39 1.37 1.29 ...
## $ parsfr : num [1:576] 1.54 1.46 1.61 1.61 1.5 ...
## $ pericalcarine : num [1:576] 1.33 1.29 1.23 1.27 1.22 ...
## $ postcentral : num [1:576] 1.4 1.34 1.43 1.42 1.34 ...
## $ posteriorcingulate : num [1:576] 1.39 1.33 1.33 1.38 1.25 ...
## $ precentral : num [1:576] 1.4 1.33 1.3 1.34 1.25 ...
## $ precuneus : num [1:576] 1.4 1.39 1.43 1.48 1.37 ...
## $ rostralanteriorcingulate: num [1:576] 1.21 1.15 1.21 1.2 1.06 ...
## $ superiorfrontal : num [1:576] 1.42 1.34 1.43 1.43 1.35 ...
## $ superiorparietal : num [1:576] 1.5 1.46 1.49 1.52 1.43 ...
## $ superiortemporal : num [1:576] 1.45 1.38 1.46 1.44 1.37 ...
## $ supramarginal : num [1:576] 1.37 1.32 1.41 1.45 1.37 ...
## $ temporalpole : num [1:576] 1.64 1.54 1.54 1.43 1.47 ...
## $ transversetemporal : num [1:576] 1.46 1.37 1.28 1.21 1.06 ...
## $ AGE : num [1:576] 77.7 77.7 69.6 69.6 69.6 72.9 72.9 72.9 79.6 79.6 ...
## $ PTGENDER : Factor w/ 2 levels "Female","Male": 1 1 1 1 1 2 2 2 2 2 ...
## $ CDRSB : num [1:576] 0 0 0.5 0.5 0 0.5 0 0.5 0 0 ...
## $ DX : Factor w/ 3 levels "CN","Dementia",..: 1 1 3 3 3 3 3 3 1 1 ...
## - attr(*, "groups")= tibble [243 x 2] (S3: tbl_df/tbl/data.frame)
## ..$ RID : int [1:243] 31 56 69 96 112 377 416 467 618 668 ...
## ..$ .rows: list<int> [1:243]
## .. ..$ : int [1:2] 1 2
## .. ..$ : int [1:3] 3 4 5
## .. ..$ : int [1:3] 6 7 8
## .. ..$ : int [1:3] 9 10 11
## .. ..$ : int [1:3] 12 13 14
## .. ..$ : int [1:3] 15 16 17
## .. ..$ : int [1:2] 18 19
## .. ..$ : int [1:2] 20 21
## .. ..$ : int [1:2] 22 23
## .. ..$ : int [1:2] 24 25
## .. ..$ : int [1:2] 26 27
## .. ..$ : int [1:2] 28 29
## .. ..$ : int [1:3] 30 31 32
## .. ..$ : int [1:2] 33 34
## .. ..$ : int [1:3] 35 36 37
## .. ..$ : int [1:2] 38 39
## .. ..$ : int [1:4] 40 41 42 43
## .. ..$ : int [1:2] 44 45
## .. ..$ : int [1:3] 46 47 48
## .. ..$ : int [1:3] 49 50 51
## .. ..$ : int [1:3] 52 53 54
## .. ..$ : int [1:2] 55 56
## .. ..$ : int [1:2] 57 58
## .. ..$ : int [1:4] 59 60 61 62
## .. ..$ : int [1:3] 63 64 65
## .. ..$ : int [1:2] 66 67
## .. ..$ : int [1:4] 68 69 70 71
## .. ..$ : int [1:2] 72 73
## .. ..$ : int [1:4] 74 75 76 77
## .. ..$ : int [1:3] 78 79 80
## .. ..$ : int [1:2] 81 82
## .. ..$ : int [1:2] 83 84
## .. ..$ : int [1:3] 85 86 87
## .. ..$ : int [1:2] 88 89
## .. ..$ : int [1:2] 90 91
## .. ..$ : int [1:3] 92 93 94
## .. ..$ : int [1:2] 95 96
## .. ..$ : int [1:4] 97 98 99 100
## .. ..$ : int [1:2] 101 102
## .. ..$ : int [1:2] 103 104
## .. ..$ : int [1:2] 105 106
## .. ..$ : int [1:2] 107 108
## .. ..$ : int [1:3] 109 110 111
## .. ..$ : int [1:3] 112 113 114
## .. ..$ : int [1:4] 115 116 117 118
## .. ..$ : int [1:2] 119 120
## .. ..$ : int [1:2] 121 122
## .. ..$ : int [1:2] 123 124
## .. ..$ : int [1:2] 125 126
## .. ..$ : int [1:3] 127 128 129
## .. ..$ : int [1:3] 130 131 132
## .. ..$ : int [1:2] 133 134
## .. ..$ : int [1:2] 135 136
## .. ..$ : int [1:2] 137 138
## .. ..$ : int [1:2] 139 140
## .. ..$ : int [1:2] 141 142
## .. ..$ : int [1:3] 143 144 145
## .. ..$ : int [1:2] 146 147
## .. ..$ : int [1:2] 148 149
## .. ..$ : int [1:3] 150 151 152
## .. ..$ : int [1:4] 153 154 155 156
## .. ..$ : int [1:2] 157 158
## .. ..$ : int [1:2] 159 160
## .. ..$ : int [1:2] 161 162
## .. ..$ : int [1:3] 163 164 165
## .. ..$ : int [1:2] 166 167
## .. ..$ : int [1:3] 168 169 170
## .. ..$ : int [1:2] 171 172
## .. ..$ : int [1:2] 173 174
## .. ..$ : int [1:3] 175 176 177
## .. ..$ : int [1:2] 178 179
## .. ..$ : int [1:2] 180 181
## .. ..$ : int [1:3] 182 183 184
## .. ..$ : int [1:2] 185 186
## .. ..$ : int [1:3] 187 188 189
## .. ..$ : int [1:3] 190 191 192
## .. ..$ : int [1:2] 193 194
## .. ..$ : int [1:3] 195 196 197
## .. ..$ : int [1:4] 198 199 200 201
## .. ..$ : int [1:2] 202 203
## .. ..$ : int [1:2] 204 205
## .. ..$ : int [1:2] 206 207
## .. ..$ : int [1:2] 208 209
## .. ..$ : int [1:2] 210 211
## .. ..$ : int [1:2] 212 213
## .. ..$ : int [1:2] 214 215
## .. ..$ : int [1:2] 216 217
## .. ..$ : int [1:2] 218 219
## .. ..$ : int [1:4] 220 221 222 223
## .. ..$ : int [1:2] 224 225
## .. ..$ : int [1:4] 226 227 228 229
## .. ..$ : int [1:2] 230 231
## .. ..$ : int [1:2] 232 233
## .. ..$ : int [1:3] 234 235 236
## .. ..$ : int [1:3] 237 238 239
## .. ..$ : int [1:2] 240 241
## .. ..$ : int [1:2] 242 243
## .. ..$ : int [1:4] 244 245 246 247
## .. ..$ : int [1:3] 248 249 250
## .. .. [list output truncated]
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
cat("\nNumber of longitudinal tau-PET scans with accompanying cognitive data: **\n",
nrow(full.df), "**\nNumber of subjects in merged dataset: **",
length(unique(full.df$RID)), "**\n", "\n", sep="")
Number of longitudinal tau-PET scans with accompanying cognitive data: 576 Number of subjects in merged dataset: 243
As it turns out, only 588 of the original 593 tau-PET scans had corresponding cognitive assessments. This leaves 576 unique PET scan datapoints for 243 subjects.
Lastly, before I can perform outlier detection, I need to derive the longitudinal features upon which the prediction models will be built – namely, annual change in tau-PET SUVR and annual change in CDR-Sum of Boxes score.
annual.changes <- full.df %>%
ungroup() %>%
select(-AGE, -PTGENDER, -DX, -VISCODE2) %>%
pivot_longer(cols=c(-RID, -EXAMDATE), names_to="Metric",
values_to="Value") %>%
dplyr::group_by(RID, Metric) %>%
dplyr::summarise(n_years = as.numeric((EXAMDATE - lag(EXAMDATE,
default=EXAMDATE[1]))/365),
change = Value - lag(Value, default=Value[1])) %>%
filter(n_years > 0) %>%
dplyr::mutate(Annual_Change = change/n_years) %>%
select(-n_years, -change) %>%
group_by(RID, Metric) %>%
dplyr::mutate(interval_num = row_number()) %>%
pivot_wider(., id_cols=c(RID, interval_num), names_from=Metric,
values_from=Annual_Change)
datatable(annual.changes[1:5])
Now that the datasets are merged, I can perform outlier detection. Given the multivariate nature of this dataset (i.e. multiple brain regions), I will use Cook’s Distance to estimate the relative influence of each data point in a simple multiple regression model.
cooks.distance <- cooks.distance(lm(CDRSB ~ . - RID - interval_num, data=annual.changes))
p.cooks <- data.frame(CD=cooks.distance) %>%
rownames_to_column(var="Data_Point") %>%
mutate(Data_Point=as.numeric(Data_Point)) %>%
mutate(Label=ifelse(CD>30, Data_Point, NA_real_)) %>%
ggplot(data=., mapping=aes(x=Data_Point,y=CD)) +
geom_hline(yintercept = 4*mean(cooks.distance,na.rm=T), color="blue") +
geom_point() +
geom_text(aes(label=Label), nudge_y=1.5) +
ylab("Cook's Distance") +
xlab("annual.changes index") +
theme_minimal()
ggplotly(p.cooks)
rm(p.cooks)
All but one data point have relatively low Cook’s distance values, while data point #224 has a relatively large Cook’s distance. This suggests large residuals and leverage associated with this datapoint, which could distort model fitting and accuracy. Upon further examination of this instance:
as.data.frame(t(annual.changes[224,])) %>%
rownames_to_column(var="Variable") %>%
dplyr::rename("Value" = "V1") %>%
datatable()
This subject exhibits very large fluctuations in tau-PET SUVR values in several brain regions for this associated time interval. Given that SUVR values typically range from 0.75-2, changes of this large magnitude is surprising, and may certainly render this data point an outlier. Fortunately, the interval_num of 2 indicates that this is the second time interval for this subject, so omitting this interval doesn’t reduce the total number of subjects in the analysis. I will remove this data point:
annual.changes <- annual.changes[-224,]
I can now finish some aspects of data exploration that depended upon refining the subject cohort as well as the features. For starters, I will examine the distribution of annual tau change in each of the 26 ROIs:
annual.changes.tau <- annual.changes %>%
select(-CDRSB) %>%
ungroup() %>%
pivot_longer(cols=c(-RID, -interval_num), names_to="ROI",
values_to="deltaSUVR")
multi.hist(annual.changes %>% ungroup() %>% select(-RID, -interval_num, -CDRSB),
dcol="red")
The distribution looks reasonably normal for each ROI, and all of the curves peak around zero, suggesting all of the ROIs have a mean of ~0. Since there are both negative values and values of zero in these data, neither log nor square root transformation would be possible, anyway. Therefore, I will leave the variable distribution as-is.
Next, I will visualize the correlation in annual tau change between each of the ROIs measured:
annual.roi <- annual.changes %>% ungroup() %>% select(-RID, -interval_num, -CDRSB)
roi.cor <- cor(annual.roi)
p.mat <- cor_pmat(annual.roi)
ggcorrplot(roi.cor, hc.order = TRUE,
outline.col = "white") %>% ggplotly(width=800, height=700)
As it turns out, all ROIs show positive correlations in the annual rate of change in tau-PET uptake, with the exception of three ROI pairs:
These are very weak correlations, and can be further visualized with scatter plots:
p.select.rois <- ggpairs(annual.roi %>% select(entorhinal, pericalcarine, transversetemporal,
lateraloccipital),
lower = list(continuous = wrap("smooth", se=F,
method = "lm", color="lightslategray",
alpha=0.4))) +
theme_minimal()
ggplotly(p.select.rois, width=700, height=600)
These negative correlations are indeed weak and mostly noise, based on the scatter plots. Regarding the other positive correlations, I am curious as to whether there are underlying trends based on either spatial proximity and/or tau progression in AD, based on cortical lobe and Braak regions, respectively:
roi.cor.long <- as.data.frame(roi.cor) %>%
rownames_to_column(var="ROI1") %>%
pivot_longer(cols=c(-ROI1), names_to="ROI2", values_to="Pearson_Corr") %>%
filter(ROI1 != ROI2) %>%
left_join(., roi.braak, by=c("ROI1"="Base_Name")) %>%
select(-ROI_Name, -Hemisphere) %>%
dplyr::rename("ROI1_Braak" = "Braak", "ROI1_Cortex" = "Cortex") %>%
left_join(., roi.braak, by=c("ROI2" = "Base_Name")) %>%
select(-ROI_Name, -Hemisphere) %>%
dplyr::rename("ROI2_Braak" = "Braak", "ROI2_Cortex" = "Cortex") %>%
mutate_at(c("ROI1_Cortex", "ROI2_Cortex"), function(x) ifelse(x=="Insula", "Ins", x))
p.cor.cortical <- roi.cor.long %>%
ggplot(., mapping=aes(x=ROI1, y=ROI2)) +
geom_tile(mapping=aes(fill=Pearson_Corr)) +
labs(fill="Pearson Coefficient") +
theme_minimal() +
facet_grid(ROI2_Cortex ~ ROI1_Cortex, scales="free",
space="free", switch="both") +
ggtitle("Correlation in Annual Tau SUVR Change by Cortical Lobe") +
theme(axis.title=element_blank(),
axis.text.y = element_text(size=11),
axis.text.x = element_text(angle=90, size=11, hjust=1),
panel.border = element_blank(),
panel.grid = element_blank(),
plot.title=element_text(hjust=0.5)) +
theme(strip.placement = "outside") +
scale_fill_gradient2(low="#210DFC", mid="white", high="#FF171B",
limits=c(-1,1))
ggsave("ROI_Correlation_Cortical.png", plot=p.cor.cortical, width=9, height=7.5, units="in", dpi=300)
include_graphics("ROI_Correlation_Cortical.png")
There are somewhat stronger inter-correlations within the frontal and parietal cortices compared with other cortical lobes. Now stratifying based on ROI Braak stage:
p.cor.braak <- roi.cor.long %>%
ggplot(., mapping=aes(x=ROI1, y=ROI2)) +
geom_tile(mapping=aes(fill=Pearson_Corr)) +
labs(fill="Pearson Coefficient") +
theme_minimal() +
facet_grid(ROI2_Braak ~ ROI1_Braak, scales="free",
space="free", switch="both") +
ggtitle("Correlation in Annual Tau SUVR Change by Braak Stage") +
theme(axis.title=element_blank(),
axis.text.y = element_text(size=11),
axis.text.x = element_text(angle=90, size=11, hjust=1),
panel.border = element_blank(),
panel.grid = element_blank()) +
theme(strip.placement = "outside") +
scale_fill_gradient2(low="#210DFC", mid="white", high="#FF171B",
limits=c(-1,1))
ggsave("ROI_Correlation_Braak.png", plot=p.cor.braak, width=9, height=7.5, units="in", dpi=300)
include_graphics("ROI_Correlation_Braak.png")
This generally high correlation in annual tau-PET SUVR changes between cortical regions may pose a challenge when it comes to modeling, because of feature collinearity. While I want to keep each region distinct for biological context, I will also reduce the dimensionality of the data using principal component analysis (PCA), which has an added benefit of yielding orthogonal un-correlated components to serve as input for the modeling phase. Since all the variables are in the same unit (i.e. change in SUVR per year), I will only need to center the data, not scale it.
pca.df <- as.matrix(annual.changes %>% ungroup() %>% select(-RID, -CDRSB, -interval_num))
res.pca <- prcomp(pca.df, center=T, scale.=F)
# The variable info can be extracted as follows:
var <- get_pca_var(res.pca)
The proportion of variance explained by each principal component (PC) can be visualized using a Scree plot:
cumpro <- cumsum(res.pca$sdev^2 / sum(res.pca$sdev^2)*100)
variances <- data.frame((res.pca$sdev^2/sum(res.pca$sdev^2))*100)
variances$PC <- c(1:31)
variances$cumpro <- cumpro
colnames(variances) <- c("Variance_Proportion", "PC", "CumVar")
newrow <- subset(variances, PC == 31)
newrow$PC <- 31.5
variances <- plyr::rbind.fill(variances, newrow)
linecolors <- c("Component Variance" = "maroon4",
"Cumulative Variance" = "green4")
p.var <- variances %>%
ggplot(data=.) +
geom_bar(data=subset(variances, PC < 32), mapping=aes(x=PC, y=Variance_Proportion),
stat="identity", fill="steelblue") +
geom_line(aes(color="Component Variance", x=PC, y=Variance_Proportion),
size=0.7, data=subset(variances, PC < 31.1), show.legend=F) +
geom_line(aes(x=PC, y=CumVar, color="Cumulative Variance"), size=0.7,
data=subset(variances, PC < 32)) +
geom_point(aes(x=PC, y=Variance_Proportion),data=subset(variances, PC < 31.1), size=1.5) +
scale_colour_manual(name="",values=linecolors,
guide = guide_legend(override.aes=list(size=2))) +
theme_minimal() +
ylab("Percentage of Variance Explained") +
xlab("Principal Component") +
ggtitle("Principal Components\nContribution to Subject Variance") +
xlim(c(0.5, 31.5)) +
scale_y_continuous(breaks=seq(0, 100, 10),
sec.axis = dup_axis(name="")) +
theme(plot.title = element_text(hjust=0.5, size=14),
axis.text = element_text(size=12),
panel.grid = element_blank(),
legend.position="bottom",
legend.text = element_text(size=12))
ggplotly(p.var, tooltip=c("x","y")) %>%
layout(legend = list(orientation = "h", y=-0.2))
The first five principal components (PCs) collectively explain 77.2% of variance in the data; beyond these components, there are only marginal increases in the cumulative variance explained. Therefore, I will move forward with these first five PCs.
Individual ROI contributions (loadings) per component can be extracted:
loadings_wide <- data.frame(res.pca$rotation) %>%
cbind(rownames(.), .) %>%
remove_rownames() %>%
dplyr::rename("ROI" = "rownames(.)") %>%
select(ROI:PC5) %>%
rowwise() %>%
left_join(., roi.braak, by=c("ROI"="Base_Name")) %>%
select(ROI, Cortex, Braak, PC1:PC5) %>%
distinct()
datatable(loadings_wide %>% mutate_if(is.numeric, function(x) round(x,4)))
I’m curious as to whether ROIs exhibit similar covariance in annual tau-PET changes based on spatial proximity (i.e. cortical region) and/or similar Alzheimer’s Disease progression (i.e. Braak stage).
p.cortex <- loadings_wide %>%
ggplot(data=., mapping=aes(x=PC1, y=PC2, label=ROI)) +
geom_hline(yintercept=0, linetype=2, alpha=0.5) +
geom_vline(xintercept=0, linetype=2, alpha=0.5) +
geom_point(aes(color=Cortex), size=3) +
theme_minimal() +
xlab("PC1 (41.6% Variance)") +
ylab("PC2 (14.0% Variance)") +
ggtitle("ROI PC Loadings by Cortical Region") +
theme(plot.title=element_text(hjust=0.5))
ggplotly(p.cortex, tooltip=c("label", "x", "y"))
rm(p.cortex)
The first note is that all of the ROIs exhibit a negative loading (correlation) with PC1. Beyond that, all of the occipital and parietal cortex ROIs are positively correlated with PC2, while the insula, temporal cortex, and cingulate cortex ROIs are all negatively correlated with PC2. The frontal cortex ROIs are right on the border of PC2, low correlations in both directions.
library(ggrepel)
p.braak <- loadings_wide %>%
ggplot(data=., mapping=aes(x=PC1, y=PC2, label=ROI)) +
geom_hline(yintercept=0, linetype=2, alpha=0.5) +
geom_vline(xintercept=0, linetype=2, alpha=0.5) +
geom_point(aes(color=Braak), size=3) +
theme_minimal() +
xlab("PC1 (41.6% Variance)") +
ylab("PC2 (14.0% Variance)") +
ggtitle("ROI PC Loadings by Braak Stage") +
theme(plot.title=element_text(hjust=0.5))
ggplotly(p.braak, tooltip=c("label", "x", "y"))
rm(p.braak)
There is not as clear a distinction to be made based on ROI Braak stage. One observation that does stand out is that all of the Braak VI ROIs are relatively close in the upper right of the points. Beyond that, the Braak stages are mixed in this loading plot.
Moving on, the subject and time interval info can be linked with the PCA results:
post.pca <- as.data.frame(res.pca$x[,1:5]) %>%
cbind(., RID=annual.changes$RID) %>%
cbind(., interval_num=annual.changes$interval_num) %>%
cbind(., CDRSB=annual.changes$CDRSB) %>%
select(RID, interval_num, CDRSB, PC1:PC5)
datatable(post.pca %>% mutate_if(is.numeric, function(x) round(x,5)))
I’ll save these prepared datasets to an .RData file for modeling:
save(annual.changes, subj.info, post.pca, file="../Prepared_Data.RData")